Overview
Exploration of all the data collected on the presence points + randomly generated background points
A note to anyone who might happen to stumble across this… I am a beginner in R and have had no exposure to similar languages. I don’t know what I’m doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.
Built with ‘r getRversion()’.
Package dependencies
You can load them using the following code which uses a function called ipak. Note this function checks to see if the packages are installed first. The “include=FALSE” supresses the package installation text appearing in the document…
presence points
Read in the presence points
presence <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy.csv", header = TRUE)
head(presence)
Update: Who sampled in which year and which month:
year
inst_by_yr <- with(presence, table(year, institutioncode))
write.csv(inst_by_yr, file = "../output/bio/institutioncode_by_yr.csv")
inst_by_yr
institutioncode
year ARC DFOCENARC DFOGulf DFOISDM DFOMTMS DFONL DFOQC MaineDMR NEFSC ROM
1998 1 0 48 1 21 582 3 0 1 0
1999 0 0 89 1 74 503 0 0 0 0
2000 10 0 98 7 58 532 0 0 0 0
2001 0 0 50 1 44 546 0 0 0 0
2002 7 0 103 1 52 493 0 0 0 0
2003 0 0 42 3 42 573 0 0 0 0
2004 0 0 90 2 24 668 177 0 0 0
2005 1 14 108 0 83 472 216 7 0 0
2006 4 11 78 4 80 480 234 6 0 0
2007 1 42 85 1 37 467 233 0 0 2
2008 0 10 81 5 38 477 263 0 0 0
2009 0 10 78 0 30 511 125 3 0 0
2010 1 10 94 0 44 529 109 0 0 0
2011 0 22 77 0 13 479 139 0 0 0
2012 0 8 85 0 10 0 148 0 0 0
2013 2 18 68 0 21 0 127 0 0 0
2014 1 22 101 0 6 0 0 0 0 0
2015 3 12 0 0 0 0 0 0 0 0
month
inst_by_mt <- with(presence, table(month, institutioncode))
write.csv(inst_by_mt, file = "../output/bio/institutioncode_by_mth.csv")
inst_by_mt
institutioncode
month ARC DFOCENARC DFOGulf DFOISDM DFOMTMS DFONL DFOQC MaineDMR NEFSC ROM
1 0 0 0 0 0 263 0 0 0 0
2 0 0 0 0 7 4 0 0 0 0
3 1 0 0 3 342 0 0 0 0 0
4 0 0 0 3 11 732 0 0 1 0
5 1 0 0 0 0 1106 17 8 0 0
6 1 0 0 0 0 1694 24 3 0 0
7 20 19 0 17 304 52 33 0 0 2
8 1 73 7 1 14 0 1735 0 0 0
9 8 23 1380 3 0 6 14 0 0 0
10 0 63 5 3 0 545 2 2 0 0
11 8 1 0 3 0 1811 0 3 0 0
12 0 0 0 1 0 1136 0 0 0 0
environmental correlates
- Get the max, min, an mean values and add into a dataframe
Temperature
temp_depth_max <- max(presence$temp_depth, na.rm = TRUE)
temp_depth_min <- min(presence$temp_depth, na.rm = TRUE)
temp_depth_mean <- mean(presence$temp_depth, na.rm = TRUE)
temp_surface_max <- max(presence$temp_surface, na.rm = TRUE)
temp_surface_min <- min(presence$temp_surface, na.rm = TRUE)
temp_surface_mean <- mean(presence$temp_surface, na.rm = TRUE)
Salinity
sal_depth_max <- max(presence$salinity_depth, na.rm = TRUE)
sal_depth_min <- min(presence$salinity_depth, na.rm = TRUE)
sal_depth_mean <- mean(presence$salinity_depth, na.rm = TRUE)
sal_surface_max <- max(presence$salinity_surface, na.rm = TRUE)
sal_surface_min <- min(presence$salinity_surface, na.rm = TRUE)
sal_surface_mean <- mean(presence$salinity_surface, na.rm = TRUE)
Chl
chl_depth_max <- max(presence$chl_depth, na.rm = TRUE)
chl_depth_min <- min(presence$chl_depth, na.rm = TRUE)
chl_depth_mean <- mean(presence$chl_depth, na.rm = TRUE)
chl_surface_max <- max(presence$chl_surface, na.rm = TRUE)
chl_surface_min <- min(presence$chl_surface, na.rm = TRUE)
chl_surface_mean <- mean(presence$chl_surface, na.rm = TRUE)
O2
o2_depth_max <- max(presence$o2_depth, na.rm = TRUE)
o2_depth_min <- min(presence$o2_depth, na.rm = TRUE)
o2_depth_mean <- mean(presence$o2_depth, na.rm = TRUE)
o2_surface_max <- max(presence$o2_surface, na.rm = TRUE)
o2_surface_min <- min(presence$o2_surface, na.rm = TRUE)
o2_surface_mean <- mean(presence$o2_surface, na.rm = TRUE)
MLP
mlp_surface_max <- max(presence$mlp_surface, na.rm = TRUE)
mlp_surface_min <- min(presence$mlp_surface, na.rm = TRUE)
mlp_surface_mean <- mean(presence$mlp_surface, na.rm = TRUE)
SSH
ssh_surface_max <- max(presence$ssh_surface, na.rm = TRUE)
ssh_surface_min <- min(presence$ssh_surface, na.rm = TRUE)
ssh_surface_mean <- mean(presence$ssh_surface, na.rm = TRUE)
create matrix
td <- c(temp_depth_max, temp_depth_min, temp_depth_mean)
ts <- c(temp_surface_max, temp_surface_min, temp_surface_mean)
sd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
ss <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
cd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
cs <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
od <- c(o2_depth_max, o2_depth_min, o2_depth_mean)
os <- c(o2_surface_max, o2_surface_min, o2_surface_mean)
mlp <- c(mlp_surface_max, mlp_surface_min, mlp_surface_mean)
ssh <- c(ssh_surface_max, ssh_surface_min, ssh_surface_mean)
env_stats <- rbind(td, ts, sd, ss, cd, cs, od, os, mlp, ssh)
row.names(env_stats) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH")
colnames(env_stats) <- c("Max", "Min", "Mean")
write.csv(env_stats, "../output/env/env_correlates_basic_stats.csv", row.names = TRUE)
env_stats
Max Min Mean
Temp Depth 289.0834045 271.2114868 274.9217164
Temp Surface 293.3608093 271.6101074 280.4476878
Salinity Depth 35.5045395 22.1449070 33.4295117
Salinity Surface 34.3667259 13.5929441 30.2518319
Chl Depth 35.5045395 22.1449070 33.4295117
Chl Surface 34.3667259 13.5929441 30.2518319
Oxygen Depth 377.2959595 0.9998450 279.6701444
Oxygen Surface 385.2262878 238.3178253 315.1620514
MLP 108.2339201 10.6823719 18.1911518
SSH -0.2825949 -0.8503166 -0.5334292
Hmm some potentially suspicious values here in - Oxygen Depth min - mlp Max and maybe also - Salinity Depth min - Salinity Surface min - CHL depth min - CHL depth max
lets just see where these values appear and check the netcdf layers. Note I will check these values in cygwin using the cdo operator infov on the netcdfs (to ensure there was no issue with processing in R).
get the year and month these values appeared in
o2d_check_yr <- subset(presence$year, presence$o2_depth == o2_depth_min)
o2d_check_mth <- subset(presence$month, presence$o2_depth == o2_depth_min)
O2 depth min is in 2005_08
And value seems correct
mlp_check_yr <- subset(presence$year, presence$mlp_surface == mlp_surface_max)
mlp_check_mth <- subset(presence$month, presence$mlp_surface == mlp_surface_max)
mlp max in in 2007_12
And yes the value seems correct
sald_check_yr <- subset(presence$year, presence$salinity_depth == sal_depth_max)
sald_check_mth <- subset(presence$month, presence$salinity_depth == sal_depth_max)
year month 2001_05
also seems correct
sals_check_yr <- subset(presence$year, presence$salinity_surface == sal_surface_min)
sals_check_mth <- subset(presence$month, presence$salinity_surface == sal_surface_min)
year month 21998_06
ok again…
Leave the rest
simple plots of env. correlates
temperature
par(mfrow=c(1,2))
plot(presence$temp_depth, main = "Temp at Depth (Various)", ylab = "Kelvin")
plot(presence$temp_surface, main = "Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

Hmm not necessarily very helpful. But anyway, lets carry on
salinity
par(mfrow=c(1,2))
plot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(presence$salinity_surface, main = "Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

Chlorophyll
par(mfrow=c(1,2))
plot(presence$chl_depth, main = "Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$chl_surface, main = "Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

O2
par(mfrow=c(1,2))
plot(presence$o2_depth, main = "Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$o2_surface, main = "Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

mlp
plot(presence$mlp_surface, main = "Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

SSH
plot(presence$ssh_surface, main = "Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

XXX SAM - YOU WANT TO THINK ABOUT DOING THIS BY DEPTH RATHER THAN ALL DEPTHS)
unique_depths <- unique(presence$depthlayerno)
unique_depthdordered <- sort(unique_depths) #just puts the list in oder with no NA
length(unique_depthdordered)
[1] 39
ouch - 39 layers… a job for a boxplot probably
frequency plots of env. corr
temperature
hist(presence$temp_surface, main = "Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(presence$temp_dept, main = "Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

chl
hist(presence$chl_surface, main = "Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(presence$chl_depth, main = "Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

salinity
hist(presence$salinity_surface, main = "Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(presence$salinity_depth, main = "Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

oxygen
hist(presence$o2_surface, main = "Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(presence$o2_depth, main = "Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

MLP
hist(presence$mlp, main = "Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ssh
hist(presence$ssh, main = "Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

simple boxplots of env. corr
Same as above but with boxplots (may provide some more useful info)
individual plots of each variable
par(mfrow=c(1,2))
boxplot(presence$chl_depth, main = "Chlorphyll at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$chl_surface, main = "Chlorphyll at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/chl_boxplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$temp_depth ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$chl_depth ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$salinity_depth ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$o2_depth ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$temp_depth ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$chl_depth ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$salinity_depth ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$o2_depth ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$mlp ~ presence$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Year")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$ssh ~ presence$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Year")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$temp_surface ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$chl_surface ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$salinity_surface ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$o2_surface ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$temp_surface ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$chl_surface ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$salinity_surface ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(presence$o2_surface ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

background points
Read in the presence points
background <- read.csv("../output/bio/background_complete_obs_cels_globot.csv", header = TRUE)
head(background)
environmental correlates
- Get the max, min, an mean values and add into a dataframe
Temperature
temp_depth_max_back <- max(background$temp_depth, na.rm = TRUE)
temp_depth_min_back <- min(background$temp_depth, na.rm = TRUE)
temp_depth_mean_back <- mean(background$temp_depth, na.rm = TRUE)
temp_surface_max_back <- max(background$temp_surface, na.rm = TRUE)
temp_surface_min_back <- min(background$temp_surface, na.rm = TRUE)
temp_surface_mean_back <- mean(background$temp_surface, na.rm = TRUE)
Salinity
sal_depth_max_back <- max(background$salinity_depth, na.rm = TRUE)
sal_depth_min_back <- min(background$salinity_depth, na.rm = TRUE)
sal_depth_mean_back <- mean(background$salinity_depth, na.rm = TRUE)
sal_surface_max_back <- max(background$salinity_surface, na.rm = TRUE)
sal_surface_min_back <- min(background$salinity_surface, na.rm = TRUE)
sal_surface_mean_back <- mean(background$salinity_surface, na.rm = TRUE)
Chl
chl_depth_max_back <- max(background$chl_depth, na.rm = TRUE)
chl_depth_min_back <- min(background$chl_depth, na.rm = TRUE)
chl_depth_mean_back <- mean(background$chl_depth, na.rm = TRUE)
chl_surface_max_back <- max(background$chl_surface, na.rm = TRUE)
chl_surface_min_back <- min(background$chl_surface, na.rm = TRUE)
chl_surface_mean_back <- mean(background$chl_surface, na.rm = TRUE)
O2
o2_depth_max_back <- max(background$o2_depth, na.rm = TRUE)
o2_depth_min_back <- min(background$o2_depth, na.rm = TRUE)
o2_depth_mean_back <- mean(background$o2_depth, na.rm = TRUE)
o2_surface_max_back <- max(background$o2_surface, na.rm = TRUE)
o2_surface_min_back <- min(background$o2_surface, na.rm = TRUE)
o2_surface_mean_back <- mean(background$o2_surface, na.rm = TRUE)
MLP
mlp_surface_max_back <- max(background$mlp_surface, na.rm = TRUE)
mlp_surface_min_back <- min(background$mlp_surface, na.rm = TRUE)
mlp_surface_mean_back <- mean(background$mlp_surface, na.rm = TRUE)
SSH
ssh_surface_max_back <- max(background$ssh_surface, na.rm = TRUE)
ssh_surface_min_back <- min(background$ssh_surface, na.rm = TRUE)
ssh_surface_mean_back <- mean(background$ssh_surface, na.rm = TRUE)
create matrix
tdb <- c(temp_depth_max_back, temp_depth_min_back, temp_depth_mean_back)
tsb <- c(temp_surface_max_back, temp_surface_min_back, temp_surface_mean_back)
sdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
ssb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
cdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
csb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
odb <- c(o2_depth_max_back, o2_depth_min_back, o2_depth_mean_back)
osb <- c(o2_surface_max_back, o2_surface_min_back, o2_surface_mean_back)
mlpb <- c(mlp_surface_max_back, mlp_surface_min_back, mlp_surface_mean_back)
sshb <- c(ssh_surface_max_back, ssh_surface_min_back, ssh_surface_mean_back)
env_stats_back <- rbind(tdb, tsb, sdb, ssb, cdb, csb, odb, osb, mlpb, sshb)
row.names(env_stats_back) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH")
colnames(env_stats_back) <- c("Max", "Min", "Mean")
write.csv(env_stats_back, "../output/env/env_correlates_background_basic_stats.csv", row.names = TRUE)
env_stats_back
Max Min Mean
Temp Depth 298.0517000 270.2798000 277.2492004
Temp Surface 298.2931000 271.0403000 278.9040097
Salinity Depth 36.2885400 12.8384380 33.3354335
Salinity Surface 35.9200800 12.0846500 32.3867768
Chl Depth 36.2885400 12.8384380 33.3354335
Chl Surface 35.9200800 12.0846500 32.3867768
Oxygen Depth 460.5706177 0.8061745 301.0655723
Oxygen Surface 410.5555000 207.4744000 315.9223887
MLP 2266.5013152 8.5859600 37.7989169
SSH -0.1283472 -1.2465040 -0.6931972
temperature
par(mfrow=c(1,2))
plot(background$temp_depth, main = "Background Points Temp at Depth (Various)", ylab = "Kelvin")
plot(background$temp_surface, main = "Background Points Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
salinity
par(mfrow=c(1,2))
plot(background$salinity_depth, main = "Background Points Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(background$salinity_surface, main = "Background Points Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

Chlorophyll
par(mfrow=c(1,2))
plot(background$chl_depth, main = "Background Points Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$chl_surface, main = "Background Points Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

O2
par(mfrow=c(1,2))
plot(background$o2_depth, main = "Background Points Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$o2_surface, main = "Background Points Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

mlp
plot(background$mlp_surface, main = "Background Points Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

SSH
plot(background$ssh_surface, main = "Background Points Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

frequency plots of env. corr
temperature
hist(background$temp_surface, main = "Background Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(background$temp_dept, main = "Background Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

chl
hist(background$chl_surface, main = "Background Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(background$chl_depth, main = "Background Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

salinity
hist(background$salinity_surface, main = "Background Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(background$salinity_depth, main = "Background Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

oxygen
hist(background$o2_surface, main = "Background Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

hist(background$o2_depth, main = "Background Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

MLP
hist(background$mlp, main = "Background Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ssh
hist(background$ssh, main = "Background Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

simple boxplots of env. corr
Same as above but with boxplots (may provide some more useful info)
temperature
boxplot(background$temp_depth ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$chl_depth ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$salinity_depth ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$o2_depth ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$temp_depth ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$chl_depth ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$salinity_depth ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$o2_depth ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$mlp ~ background$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Year")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$ssh ~ background$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Year")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$temp_surface ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$chl_surface ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$salinity_surface ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$o2_surface ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$temp_surface ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$chl_surface ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$salinity_surface ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

boxplot(background$o2_surface ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

correlations between background points
check to see if there are any correlations in the env. variables for the background points
first subset the env.correlate columns (you don’t need everything)
background_env <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
background_env_cor <- cor(background_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_env_cor, "../output/env/background_env_corr.csv", row.names = TRUE)
background_corrplot <- corrplot(background_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8)
dev.copy(png,"../output/env/simple_plots/background_envcorr.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
par(mfrow=c(4,4))
for(i in 1:16){
plot(density(background_env[,i], na.rm=T), main = names(background_env)[i])
}

PUT THE CHUNK OUTPUT BACK TO INLINE
add nafo region and gear type into the mix
first subset the env.correlate columns + bottom_depth (you don’t need everything)
background_envbotdepth <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
background_envbotdepth_cor <- cor(background_envbotdepth, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_envbotdepth_cor, "../output/env/background_envbotdepth_cor.csv", row.names = TRUE)
background_corrplot <- corrplot(background_envbotdepth_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8)
dev.copy(png,"../output/env/simple_plots/background_envbotdepth_cor.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

correlations between presence points
check to see if there are any correlations in the env. variables
first subset the env.correlate columns (you don’t need everything) then use cor to get the correlation values, and then corrplot for a visual
pres_env <- subset(presence, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
pres_env_cor <- cor(pres_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(pres_env_cor, "../output/env/pres_env_corr.csv", row.names = TRUE)
pres_corrplot <- corrplot(pres_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8)
dev.copy(png,"../output/env/simple_plots/pres_envcorr.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
graphics.off()
tiff("../output/env/simple_plots/background_envcorr_density.tiff") # to automatically save the plot to a png AND show it inline
par(mfrow=c(4,4), mar=c(1,1,1,1))
for(i in 1:16){
plot(density(pres_env[,i], na.rm=T), main = names(pres_env)[i])
}
dev.off() # stops automatic saving of the plot to a png
null device
1
PUT THE CHUNK OUTPUT BACK TO INLINE
density plot with background and presence env. data
Inspired by Merrow 2013 - top paragraph of page 1063 (are the species observations uniformly distributed over the background, or are they skewed)
ggplot(pres_env, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/mlp_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

ggplot(pres_env, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/ssh_backvspres.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

NAFO Regions
compare the environmental correlates between different NAFO regions
first see which nafo zones were sampled in each year
nafo_by_yr <- with(presence, table(year, nafo_zone))
write.csv(nafo_by_yr, file = "../output/bio/nafozone_by_yr.csv")
nafo_by_yr
nafo_zone
year 0A 0B 1C 2G 2H 2J 3K 3L 3M 3N 3O 3Pn 3Ps 4R 4S 4T 4Vn 4Vs 4W 4X 5Y 5Ze HudsonStrait
1998 0 0 0 0 12 56 113 230 0 44 69 3 58 0 0 70 9 7 4 3 1 0 0
1999 0 0 0 0 0 35 103 212 0 45 50 2 60 0 0 87 11 35 28 2 2 0 0
2000 0 0 0 0 0 55 116 238 2 52 53 4 48 0 0 99 12 26 20 1 0 0 0
2001 0 0 0 0 3 35 126 214 0 52 56 2 61 0 0 52 6 28 11 0 0 0 0
2002 0 0 0 0 0 36 71 214 0 50 62 4 60 0 0 110 8 28 16 1 0 0 0
2003 0 0 0 0 0 50 66 232 0 63 76 3 89 0 0 44 8 20 15 1 0 0 0
2004 0 0 0 0 8 60 209 194 0 58 66 7 69 40 70 160 10 11 5 1 0 0 0
2005 0 7 0 7 0 49 92 198 0 37 54 1 42 50 112 166 27 23 30 4 7 0 0
2006 3 4 0 3 14 49 175 178 0 25 26 0 19 28 132 148 11 39 31 9 7 1 0
2007 0 13 0 3 0 36 84 173 0 56 57 1 62 28 122 172 10 19 11 0 1 0 26
2008 3 7 0 0 6 49 82 162 0 46 57 0 77 64 129 157 5 18 13 3 0 2 0
2009 0 0 0 1 0 45 100 172 0 54 67 8 66 18 63 130 7 1 23 0 3 0 9
2010 0 9 1 0 6 34 111 207 0 62 63 3 50 16 56 134 12 18 14 0 0 0 0
2011 0 9 0 1 5 36 80 161 0 70 51 12 64 32 58 127 7 6 2 0 0 0 12
2012 0 7 0 1 0 0 0 0 0 0 0 0 0 34 71 133 5 1 3 1 0 0 0
2013 0 3 0 6 1 0 0 2 0 0 0 0 0 27 58 113 15 2 4 0 0 0 8
2014 1 16 0 1 0 0 0 1 0 0 0 0 0 0 0 101 6 0 0 0 0 0 4
2015 0 8 0 1 0 0 0 2 0 0 0 0 1 0 0 0 0 0 0 0 0 0 3
and by month
nafo_by_mth <- with(presence, table(nafo_zone, month))
write.csv(nafo_by_mth, file = "../output/bio/nafozone_by_mth.csv")
nafo_by_mth
month
nafo_zone 1 2 3 4 5 6 7 8 9 10 11 12
0A 0 0 0 0 0 0 0 3 0 3 1 0
0B 0 0 0 0 0 0 0 54 23 6 0 0
1C 0 0 0 0 0 0 0 1 0 0 0 0
2G 0 0 0 0 0 0 16 8 0 0 0 0
2H 0 0 0 0 0 0 2 0 0 50 0 3
2J 6 0 0 0 0 0 0 0 0 125 381 113
3K 255 4 0 0 0 0 0 2 0 0 610 657
3L 2 0 0 0 105 1410 83 1 0 64 764 361
3M 0 0 0 0 0 0 0 0 0 2 0 0
3N 0 0 0 0 308 254 0 0 0 110 40 2
3O 0 0 0 28 522 31 1 0 6 194 24 1
3Pn 0 0 0 42 8 0 0 0 0 0 0 0
3Ps 0 0 0 662 163 0 0 0 0 0 1 0
4R 0 0 0 0 0 0 18 318 1 0 0 0
4S 0 0 0 0 2 10 5 852 2 0 0 0
4T 0 0 0 0 15 14 10 570 1387 7 0 0
4Vn 0 0 0 1 0 0 145 8 15 0 0 0
4Vs 0 0 178 2 0 0 99 3 0 0 0 0
4W 0 6 163 11 0 0 44 4 0 1 1 0
4X 0 0 3 0 0 0 20 0 0 2 1 0
5Y 0 0 0 1 9 3 3 0 0 2 3 0
5Ze 0 1 2 0 0 0 0 0 0 0 0 0
HudsonStrait 0 0 0 0 0 0 1 7 0 54 0 0
Interesting that there is a point in 1C - this is outside Canadian waters…. anyway
density plot with background and presence env. data by NAFO region
What I want to see if if there is a marked difference between the env. correlates of the presence points between NAFO regions. This is also to try deal with sampling bias (b/c the whole region is not uniformly sampled in each month, but rather nafo regions have a strong month bias)
first create NAFO-region datasets
nafo0a <- subset(presence, nafo_zone == "0A")
nafo0b <- subset(presence, nafo_zone == "0B")
nafo1c <- subset(presence, nafo_zone == "1C")
nafo2g <- subset(presence, nafo_zone == "2G")
nafo2h <- subset(presence, nafo_zone == "2H")
nafo2j <- subset(presence, nafo_zone == "2J")
nafo3k <- subset(presence, nafo_zone == "3K")
nafo3l <- subset(presence, nafo_zone == "3L")
nafo3m <- subset(presence, nafo_zone == "3M")
nafo3n <- subset(presence, nafo_zone == "3N")
nafo3o <- subset(presence, nafo_zone == "3O")
nafo3ps <- subset(presence, nafo_zone == "3Ps")
nafo4r <- subset(presence, nafo_zone == "4R")
nafo4s <- subset(presence, nafo_zone == "4S")
nafo4t <- subset(presence, nafo_zone == "4T")
nafo4vn <- subset(presence, nafo_zone == "4Vn")
nafo4vs <- subset(presence, nafo_zone == "4Vs")
nafo4w <- subset(presence, nafo_zone == "4W")
nafo4x <- subset(presence, nafo_zone == "4X")
nafo5Y <- subset(presence, nafo_zone == "5Ze")
nafohudson <- subset(presence, nafo_zone == "HudsonStrait")
plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
ggplot(nafo0a, aes(x = o2_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_depth_nafo.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

Let’s plot the variables by nafo region/year then by month
pr98 <- subset(presence, year == "1998")
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
pr98 <- subset(presence, year == "1998")
pr11 <- subset(presence, year == "2011")
par(mfrow=c(2,1))
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
boxplot(pr11$temp_depth ~ pr11$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
ok scrap the nafo region analysis - it is too mixed up with month (so seeing if region or month is a factor that needs to be factored in to the model is not appropriate)
remap who sampled
Now lets check the number of records and spatial-temporal distribution of the observations by institution code to make sure none are dodgy
first a table of how many observations each instituioncode has
obs_by_ins <- count(presence, "institutioncode")
obs_by_ins
write.csv(obs_by_ins, file = "../output/bio/samplinginstitutions/no_observations_institutioncode.csv")
ok so IMR, NAFO, ICES, & USM have very few entries (1, 1, 1, and 3 respectively)
Lets take a look at the spatial breakdown of these institutions.First all points…
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(38, 70), asp = 1, main = "All Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(presence$decimalLongitude, presence$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/all_occurrences.png") #this prints a png of the plot
dev.off() #this turns off the print command
Note there is one point up by iceland that you should get rid of (Icelandic population thought to be seperate from Labrador, but it is unclear if this is true or not).
Map the institutioncode == ARC only data…
ARC_obs <- presence[presence$institutioncode == "ARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Arc Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ARC_obs$decimalLongitude, ARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFOCENARC_obs <- presence[presence$institutioncode == "DFOCENARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Central & Arctic Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOCENARC_obs$decimalLongitude, DFOCENARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOCENARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFOGulf_obs <- presence[presence$institutioncode == "DFOGulf", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Gulf Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOGulf_obs$decimalLongitude, DFOGulf_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOGulf_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFOISDM_obs <- presence[presence$institutioncode == "DFOISDM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO ISDM Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOISDM_obs$decimalLongitude, DFOISDM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOISDM_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFOMTMS_obs <- presence[presence$institutioncode == "DFOMTMS", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Maritimes Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOMTMS_obs$decimalLongitude, DFOMTMS_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOMTMS_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFONL_obs <- presence[presence$institutioncode == "DFONL", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Newfouundland & Labrador Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFONL_obs$decimalLongitude, DFONL_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFONL_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
DFOQC_obs <- presence[presence$institutioncode == "DFOQC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Quebec Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOQC_obs$decimalLongitude, DFOQC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOQC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
MaineDMR_obs <- presence[presence$institutioncode == "MaineDMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Maine DMR Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(MaineDMR_obs$decimalLongitude, MaineDMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/MaineDMR_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
NEFSC_obs <- presence[presence$institutioncode == "NEFSC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "NEFSC Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NEFSC_obs$decimalLongitude, NEFSC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/NEFSC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command
ROM_obs <- presence[presence$institutioncode == "ROM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Royal Ontario Museum Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ROM_occurrences_all.png") #this prints a png of the plot
check for gear type
what are the unique gear types you have in your presence data, and how many?
so the vast majority are trawls of some type.
map the gear usage in Arcgis (gear_type_map)
create a table of gear use by month
gearby_mth
gear
month bottom_trawl bottom_trawl_alfredo_03 bottom_trawl_campelen_14 bottom_trawl_campelen_1800 bottom_trawl_campelen_21 bottom_trawl_cosmos_2600 bottom_trawl_western_IIA unknown
1 0 0 0 263 0 0 0 0
2 7 0 0 4 0 0 0 0
3 342 0 0 0 0 0 0 1
4 12 0 0 732 0 0 0 0
5 8 0 0 1106 0 0 0 18
6 3 0 0 1694 0 0 0 25
7 304 0 18 64 1 0 0 43
8 14 0 34 1193 36 3 7 543
9 0 0 0 12 23 0 1380 16
10 2 3 0 545 0 60 5 2
11 3 0 0 1811 0 1 0 8
12 0 0 0 1136 0 0 0 0
gear
month vertical_plankton_tow
1 0
2 0
3 3
4 3
5 0
6 0
7 17
8 1
9 3
10 3
11 3
12 1
What I want to see if if there is a marked difference between the env. correlates of the presence points between gear type used. This is also to try deal with detection bias between gear type (b/c the whole region is not uniformly sampled by the same gear type)
first create gear datasets
presence$gear <- as.character(presence$gear)
bottom_trawl <- subset(presence, gear == "bottom_trawl")
bottom_trawl_alfredo_03 <- subset(presence, gear == "bottom_trawl_alfredo_03")
bottom_trawl_campelen_14 <- subset(presence, gear == "bottom_trawl_campelen_14")
bottom_trawl_campelen_1800 <- subset(presence, gear == "bottom_trawl_campelen_1800")
bottom_trawl_campelen_21 <- subset(presence, gear == "bottom_trawl_campelen_21")
bottom_trawl_cosmos_2600 <- subset(presence, gear == "bottom_trawl_cosmos_2600")
bottom_trawl_western_IIA <- subset(presence, gear == "bottom_trawl_western_IIA")
unknown <- subset(presence, gear == "unknown")
vertical_plankton_tow <- subset(presence, gear == "vertical_plankton_tow")
plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
ggplot(bottom_trawl, aes(x = o2_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_depth_gear.png") # to automatically save the plot to a png AND show it inline
png
3
dev.off() # stops automatic saving of the plot to a png
png
2

par(mar=c(7,5,1,1))
boxplot(presence$temp_depth ~ presence$gear, ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth by gear type", las = 2)
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
looks like not much difference… what about a kruskal wallace test?
kruskal.test(presence$temp_depth ~ presence$gear)
Ok so there is a statistically sig difference somewhere in the temp at depth reported by the gear type (Kruskal-Wallis chi-squared = 248.27, df = 7, p-value < 2.2e-16)
To see where the difference(s) are run a Dunn test Zar (2010) states that the Dunn test (in the FSA package) is appropriate for groups with unequal numbers of observations.(Zar, J.H. 2010. Biostatistical Analysis, 5th ed. Pearson Prentice Hall: Upper Saddle River, NJ.) http://rcompanion.org/rcompanion/d_06.html
SAM WHEN DUNN IS INSTALLED, GO TO RCOMPANIONS.ORG AND LOOK FOR DUNN - CHECK CHROME HISTORY (PROBABLY UNDER KRUSKAL WALLIS) ALSO MAP THE DISTRIBUTION OF THESE GEAR TYPES
pairwise.wilcox.test(presence$temp_depth, presence$gear, p.adj='bonferroni', exact=F)
dunn test
gear_temp_depthdunn <- dunnTest(presence$temp_depth, presence$gear, list = TRUE)
gear_temp_depthdunn
write.csv(table, "../output/env/gear_temp_depthdunn.csv", row.names = TRUE)
vif
for this you need the joined dataset. this was created under mergig_datasets.src and the file is ../output/bio/presab.csv
presab <- read.csv("../output/bio/presab.csv", header = TRUE)
vif_allpresab <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allpresab, "../output/bio/vif_allpresab.csv", row.names = TRUE)
vif_allpresab
interpret - see https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif/96584#96584 To make GVIFs comparable across dimensions, we suggested using GVIF^(1/(2Df)), where Df is the number of coefficients in the subset (ref fox and motette 1992 in zotero) or the 2 continuous variables, GVIFˆ(1/(2Df)) (which is basically the square root of the VIF/GVIF value as DF=1) is the proportional change of the standard error and confidence interval of their coefficients due to the level of collinearity. The GVIFˆ(1/(2Df)) value of the categorical variable is a similar measure for the reduction in precision of the coefficients’ estimation due to collinearity. apparently i just need to square GVIF^(1/(2Df)) and then use the normal VIF “rule of thumb”…
vif_allpresab_sq <- read.csv("../output/bio/vif_allpresab.csv", header = TRUE)
vif_allpresab_sq$GVIF2Dfsq <- vif_allpresab_sq$GVIF..1..2.Df..^2
write.csv(vif_allpresab_sq, "../output/bio/vif_allpresab_sq.csv", row.names = TRUE)
vif_allpresab_sq
As per SLR suggestion, rerun without gear
vif_allbutgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutgear, "../output/bio/vif_allbutgear.csv", row.names = TRUE)
vif_allbutgear
As per SLR suggestion, rerun without gear and most highly correlated variables
vif_allbutgearhighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutgearhighlycorr, "../output/bio/vif_allbutgearhighlycorr.csv", row.names = TRUE)
vif_allbutgearhighlycorr
As per SLR suggestion, rerun with gear but without most highly correlated variables
vif_allbuthighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter + gear, data = presab))
write.csv(vif_allbuthighlycorr, "../output/bio/vif_allbuthighlycorr.csv", row.names = TRUE)
vif_allbuthighlycorr
vif_allbuthighlycorr_sq <- read.csv("../output/bio/vif_allbuthighlycorr.csv", header = TRUE)
vif_allbuthighlycorr_sq$GVIF2Dfsq <- vif_allbuthighlycorr_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighlycorr_sq, "../output/bio/vif_allbuthighlycorr_sq.csv", row.names = TRUE)
vif_allbuthighlycorr_sq
ok remove one variable at a time - leave gear in
remove amo_prev
vif_allbutamo_prev <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prev, "../output/bio/vif_allbutamo_prev.csv", row.names = TRUE)
vif_allbutamo_prev
vif_allbutamo_prev_sq <- read.csv("../output/bio/vif_allbutamo_prev.csv", header = TRUE)
vif_allbutamo_prev_sq$GVIF2Dfsq <- vif_allbutamo_prev_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutamo_prev_sq, "../output/bio/vif_allbutamo_prev_sq.csv", row.names = TRUE)
vif_allbutamo_prev_sq
and leave gear out
remove amo_prev + gear
vif_allbutamo_prevgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prevgear, "../output/bio/vif_allbutamo_prevgear.csv", row.names = TRUE)
vif_allbutamo_prevgear
remove chl_depth
vif_allbutchl_depth <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depth, "../output/bio/vif_allbutchl_depth.csv", row.names = TRUE)
vif_allbutchl_depth
vif_allbutchl_depth_sq <- read.csv("../output/bio/vif_allbutchl_depth.csv", header = TRUE)
vif_allbutchl_depth_sq$GVIF2Dfsq <- vif_allbutchl_depth_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutchl_depth_sq, "../output/bio/vif_allbutchl_depth_sq.csv", row.names = TRUE)
vif_allbutchl_depth_sq
remove chl_depth and gear
vif_allbutchl_depthgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depthgear, "../output/bio/vif_allbutchl_depthgear.csv", row.names = TRUE)
vif_allbutchl_depthgear
remove ssh_surface
vif_allbutssh_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surface, "../output/bio/vif_allbutssh_surface.csv", row.names = TRUE)
vif_allbutssh_surface
vif_allbutssh_surface_sq <- read.csv("../output/bio/vif_allbutssh_surface.csv", header = TRUE)
vif_allbutssh_surface_sq$GVIF2Dfsq <- vif_allbutssh_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutssh_surface_sq, "../output/bio/vif_allbutssh_surface_sq.csv", row.names = TRUE)
vif_allbutssh_surface_sq
remove ssh_surface & gear
vif_allbutssh_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surfacegear, "../output/bio/vif_allbutssh_surfacegear.csv", row.names = TRUE)
vif_allbutssh_surfacegear
remove o2_surface
vif_allbuto2_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surface, "../output/bio/vif_allbuto2_surface.csv", row.names = TRUE)
vif_allbuto2_surface
vif_allbuto2_surface_sq <- read.csv("../output/bio/vif_allbuto2_surface.csv", header = TRUE)
vif_allbuto2_surface_sq$GVIF2Dfsq <- vif_allbuto2_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuto2_surface_sq, "../output/bio/vif_allbuto2_surface_sq.csv", row.names = TRUE)
vif_allbuto2_surface_sq
remove o2_surface & gear
vif_allbuto2_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surfacegear, "../output/bio/vif_allbuto2_surfacegear.csv", row.names = TRUE)
vif_allbuto2_surfacegear
as per SLR chat jan 23:
remove salinity_surface only
vif_allbutsalinitysurface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurface, "../output/bio/vif_allbutsalinitysurface.csv", row.names = TRUE)
vif_allbutsalinitysurface
vif_allbutsalinitysurface_sq <- read.csv("../output/bio/vif_allbutsalinitysurface.csv", header = TRUE)
vif_allbutsalinitysurface_sq$GVIF2Dfsq <- vif_allbutsalinitysurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutsalinitysurface_sq, "../output/bio/vif_allbutsalinitysurface_sq.csv", row.names = TRUE)
vif_allbutsalinitysurface_sq
and without gear
vif_allbutsalinitysurfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurfacegear, "../output/bio/vif_allbutsalinitysurfacegear.csv", row.names = TRUE)
vif_allbutsalinitysurfacegear
now remove temp_surface plus highly correlated
#with gear
vif_allbuthighcorrtempsurface <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurface, "../output/bio/vif_allbuthighcorrtempsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface
vif_allbuthighcorrtempsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsurface.csv", header = TRUE)
vif_allbuthighcorrtempsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsurface_sq, "../output/bio/vif_allbuthighcorrtempsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface_sq
#without gear
vif_allbuthighcorrtempsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurfacegear, "../output/bio/vif_allbuthighcorrtempsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsurfacegear
now remove temp_surface plus salinity_surface
#with gear
vif_allbuttempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurface, "../output/bio/vif_allbuttempsalsurface.csv", row.names = TRUE)
vif_allbuttempsalsurface
vif_allbuttempsalsurface_sq <- read.csv("../output/bio/vif_allbuttempsalsurface.csv", header = TRUE)
vif_allbuttempsalsurface_sq$GVIF2Dfsq <- vif_allbuttempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuttempsalsurface_sq, "../output/bio/vif_allbuttempsalsurface_sq.csv", row.names = TRUE)
vif_allbuttempsalsurface_sq
#without gear
vif_allbuttempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurfacegear, "../output/bio/vif_allbuttempsalsurfacegear.csv", row.names = TRUE)
vif_allbuttempsalsurfacegear
now remove temp_surface plus salinity_surface + highly correlated
#with gear
vif_allbuthighcorrtempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurface, "../output/bio/vif_allbuthighcorrtempsalsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface
vif_allbuthighcorrtempsalsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsalsurface.csv", header = TRUE)
vif_allbuthighcorrtempsalsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsalsurface_sq, "../output/bio/vif_allbuthighcorrtempsalsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface_sq
#without gear
vif_allbuthighcorrtempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurfacegear, "../output/bio/vif_allbuthighcorrtempsalsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurfacegear
Just out of curiosity, a vif will all but but atmos drivers
vif_allbutnaoamo <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear, data = presab))
write.csv(vif_allbutnaoamo, "../output/bio/vif_allbutnaoamo.csv", row.names = TRUE)
vif_allbutnaoamo
vif_allbutnaoamo_sq <- read.csv("../output/bio/vif_allbutnaoamo.csv", header = TRUE)
vif_allbutnaoamo_sq$GVIF2Dfsq <- vif_allbutnaoamo_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutnaoamo_sq, "../output/bio/vif_allbutnaoamo_sq.csv", row.names = TRUE)
vif_allbutnaoamo_sq
check to see how many cells have >1 point per timeslice
this needs to be 3D so you need to create a cell_id_3d
presab$cell_id_3d <- paste(presab$cell_id, presab$depthlayerno, sep="_") #create a new column that is a unique cell_id & depth ID.
head(presab)
and now do one with a timeslice label
write.csv(presab, "../output/bio/../output/bio/presab_cellid_xyzt.csv", row.names = FALSE)
cannot open file '../output/bio/../output/bio/presab_cellid_xyzt.csv': No such file or directoryError in file(file, ifelse(append, "a", "w")) :
cannot open the connection
because background points can land in the same xyzt, subset the dataset to be only presences
presencedup <- subset(presab, occurrence == "1")
nrow(presencedup)
[1] 11393
the data is in presencedup$total_cell_obs_xyzt
unique_pointpercell <- unique(presencedup$total_cell_obs_xyzt)
unique_pointpercell
[1] 1 3 2 4 5 6 15
bugger… ok so lets do a frequency table
so there are 949 3d timesliced cells that contain more than one occurrence, and 2042 rows. - different sampling days - different coordinates - risk of duplicate entries (but only if two instituions inputted the same data with slightly different coordinates and different reserch program names - see cell3985_18_2000_7 below as example)
cell3985_18_2000_7 <- subset(celobsmore1, cell_id_xyzt == "3985_18_2000_7")
cell3985_18_2000_7
Ideally you only want one observation cell cell_id_xyzt. First see if any are missing depth data
find the column numbers that correspond to the env. data (use fastmatch package as is quicker)
fmatch("chl_surface", names(celobsmore1)) #28
[1] 28
fmatch("chl_depth", names(celobsmore1)) #29
[1] 29
fmatch("mlp_surface", names(celobsmore1)) #30
[1] 30
fmatch("o2_surface", names(celobsmore1)) #31
[1] 31
fmatch("o2_depth", names(celobsmore1)) #32
[1] 32
fmatch("salinity_surface", names(celobsmore1)) #33
[1] 33
fmatch("salinity_depth", names(celobsmore1)) #34
[1] 34
fmatch("ssh_surface", names(celobsmore1)) #35
[1] 35
fmatch("temp_surface", names(celobsmore1)) #36
[1] 36
fmatch("temp_depth", names(celobsmore1)) #37
[1] 37
then subset any with NA values
celobsmore1_novals <- celobsmore1[!complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_novals) #just to check how many there are - 1095 out of 2042
[1] 1095
ok and subset with all values
celobsmore1_allvals <- celobsmore1[complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_allvals) #947 out of 2042
[1] 947
ok so how many unique cells do we have with complete data…
pointpercellcount_alldata <- count(celobsmore1_allvals, "total_cell_obs_xyzt")
pointpercellcount_alldata$no_cells <- pointpercellcount_alldata$freq / pointpercellcount_alldata$total_cell_obs_xyzt
write.csv(pointpercellcount_alldata, file = "../output/bio/pointpercellcount_alldata.csv")
pointpercellcount_alldata
and see which cell_id_xyzt appear in both celobsmore1_allvals and celobsmore1_novals
celobsmore1_allnovals <- inner_join(celobsmore1_allvals, celobsmore1_novals)
Joining, by = c("cell_id", "year", "month", "depthlayerno", "id", "decimalLatitude", "decimalLongitude", "datecollected", "institutioncode", "individualcount", "depth", "resname", "originalscientificname", "collectioncode", "day", "occurrence", "nafo_zone", "gear", "longitude_meters", "latitude_meters", "amo_sample", "amo_prev", "amo_winter", "depth_layer", "bottom_depth", "total_cell_obs_xy", "total_cell_obs_xyt", "chl_surface", "chl_depth", "mlp_surface", "o2_surface", "o2_depth", "salinity_surface", "salinity_depth", "ssh_surface", "temp_surface", "temp_depth", "nao_sample", "nao_prev", "nao_winter", "total_cell_obs_xyzt", "temp_celsius_depth", "temp_celsius_surface", "bottom_depth_glorys", "cell_id_3d", "cell_id_xyzt")
celobsmore1_allnovals
None…. which is good. I’d expect observations in the same xyzt to have the same values
so, I can successfully reduce the observations down to one per xyzt
obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno))
Error: unexpected ')' in "obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno))"
monthly spearmans correlations and VIF
curious - does correlations/vif alter between months?
First split the dataset into monthly
janpresab <- subset(presab, month == "1")
febpresab <- subset(presab, month == "2")
marpresab <- subset(presab, month == "3")
aprpresab <- subset(presab, month == "4")
maypresab <- subset(presab, month == "5")
junpresab <- subset(presab, month == "6")
julpresab <- subset(presab, month == "7")
augpresab <- subset(presab, month == "8")
seppresab <- subset(presab, month == "9")
octpresab <- subset(presab, month == "10")
novpresab <- subset(presab, month == "11")
decpresab <- subset(presab, month == "12")
now get the background points for each month (for spearmans)
janback <- subset(janpresab, occurrence == "0")
febback <- subset(febpresab, occurrence == "0")
marback <- subset(marpresab, occurrence == "0")
aprback <- subset(aprpresab, occurrence == "0")
mayback <- subset(maypresab, occurrence == "0")
junback <- subset(junpresab, occurrence == "0")
julback <- subset(julpresab, occurrence == "0")
augback <- subset(augpresab, occurrence == "0")
sepback <- subset(seppresab, occurrence == "0")
octback <- subset(octpresab, occurrence == "0")
novback <- subset(novpresab, occurrence == "0")
decback <- subset(decpresab, occurrence == "0")
run the correlations on each month’s background points
janback <- subset(janback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
janback_cor <- cor(janback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(janback_cor, "../output/env/janback_cor.csv", row.names = TRUE)
febback <- subset(febback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
febback_cor <- cor(febback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(febback_cor, "../output/env/febback_cor.csv", row.names = TRUE)
marback <- subset(marback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
marback_cor <- cor(marback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(marback_cor, "../output/env/marback_cor.csv", row.names = TRUE)
aprback <- subset(aprback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
aprback_cor <- cor(aprback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(aprback_cor, "../output/env/aprback_cor.csv", row.names = TRUE)
mayback <- subset(mayback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
mayback_cor <- cor(mayback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(mayback_cor, "../output/env/mayback_cor.csv", row.names = TRUE)
junback <- subset(junback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
junback_cor <- cor(junback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(junback_cor, "../output/env/junback_cor.csv", row.names = TRUE)
julback <- subset(julback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
julback_cor <- cor(julback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(julback_cor, "../output/env/julback_cor.csv", row.names = TRUE)
augback <- subset(augback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
augback_cor <- cor(augback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(augback_cor, "../output/env/augback_cor.csv", row.names = TRUE)
sepback <- subset(sepback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
sepback_cor <- cor(sepback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(sepback_cor, "../output/env/sepback_cor.csv", row.names = TRUE)
octback <- subset(octback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
octback_cor <- cor(octback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(octback_cor, "../output/env/octback_cor.csv", row.names = TRUE)
novback <- subset(novback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
novback_cor <- cor(novback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(novback_cor, "../output/env/novback_cor.csv", row.names = TRUE)
decback <- subset(decback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
decback_cor <- cor(decback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(decback_cor, "../output/env/decback_cor.csv", row.names = TRUE)
maxent?
library("raster")
library("dismo")
library("rgeos")
---
title: "Data Exploration"
author: "Samantha Andrews"
output: 
  html_notebook: 
    fig_height: 7
    fig_width: 10
editor_options: 
  chunk_output_type: inline
---

# Overview
Exploration of all the data collected on the presence points + randomly generated background points

A note to anyone who might happen to stumble across this... I am a beginner in R and have had no exposure to similar languages. I don't know what I'm doing. The code herein is unlikely to be elegant and there are probably more efficient ways of running the code.

Built with 'r getRversion()'.

# Package dependencies
You can load them using the following code which uses a function called [ipak](https://gist.github.com/stevenworthington/3178163). 
Note this function checks to see if the packages are installed first.
The "include=FALSE" supresses the package installation text appearing in the document...
```{r pre-install packages, include=FALSE}
packages <- c("plyr", "graphics", "ggplot2", "corrplot", "FSA", "car", "rworldmap", "fastmatch") 
source("../src/ipak.R")
ipak(packages)
```


# presence points
Read in the presence points

```{r}
presence <- read.csv("../output/bio/presence_points_without_envdata_relooped_glbathy.csv", header = TRUE)
head(presence)
```

## Update: Who sampled in which year and which month:

year
```{r}
inst_by_yr <- with(presence, table(year, institutioncode))
write.csv(inst_by_yr, file = "../output/bio/institutioncode_by_yr.csv")
inst_by_yr
```

month
```{r}
inst_by_mt <- with(presence, table(month, institutioncode))
write.csv(inst_by_mt, file = "../output/bio/institutioncode_by_mth.csv")
inst_by_mt
```

## environmental correlates

1) Get the max, min, an mean values and add into a dataframe

Temperature
```{r}
temp_depth_max <- max(presence$temp_depth, na.rm = TRUE)
temp_depth_min <- min(presence$temp_depth, na.rm = TRUE)
temp_depth_mean <- mean(presence$temp_depth, na.rm = TRUE)
temp_surface_max <- max(presence$temp_surface, na.rm = TRUE)
temp_surface_min <- min(presence$temp_surface, na.rm = TRUE)
temp_surface_mean <- mean(presence$temp_surface, na.rm = TRUE)
```

Salinity
```{r}
sal_depth_max <- max(presence$salinity_depth, na.rm = TRUE)
sal_depth_min <- min(presence$salinity_depth, na.rm = TRUE)
sal_depth_mean <- mean(presence$salinity_depth, na.rm = TRUE)
sal_surface_max <- max(presence$salinity_surface, na.rm = TRUE)
sal_surface_min <- min(presence$salinity_surface, na.rm = TRUE)
sal_surface_mean <- mean(presence$salinity_surface, na.rm = TRUE)
```

Chl
```{r}
chl_depth_max <- max(presence$chl_depth, na.rm = TRUE)
chl_depth_min <- min(presence$chl_depth, na.rm = TRUE)
chl_depth_mean <- mean(presence$chl_depth, na.rm = TRUE)
chl_surface_max <- max(presence$chl_surface, na.rm = TRUE)
chl_surface_min <- min(presence$chl_surface, na.rm = TRUE)
chl_surface_mean <- mean(presence$chl_surface, na.rm = TRUE)
```

O2
```{r}
o2_depth_max <- max(presence$o2_depth, na.rm = TRUE)
o2_depth_min <- min(presence$o2_depth, na.rm = TRUE)
o2_depth_mean <- mean(presence$o2_depth, na.rm = TRUE)
o2_surface_max <- max(presence$o2_surface, na.rm = TRUE)
o2_surface_min <- min(presence$o2_surface, na.rm = TRUE)
o2_surface_mean <- mean(presence$o2_surface, na.rm = TRUE)
```

MLP
```{r}
mlp_surface_max <- max(presence$mlp_surface, na.rm = TRUE)
mlp_surface_min <- min(presence$mlp_surface, na.rm = TRUE)
mlp_surface_mean <- mean(presence$mlp_surface, na.rm = TRUE)
```

SSH
```{r}
ssh_surface_max <- max(presence$ssh_surface, na.rm = TRUE)
ssh_surface_min <- min(presence$ssh_surface, na.rm = TRUE)
ssh_surface_mean <- mean(presence$ssh_surface, na.rm = TRUE)
```

create matrix
```{r}
td <- c(temp_depth_max, temp_depth_min, temp_depth_mean)
ts <- c(temp_surface_max, temp_surface_min, temp_surface_mean)
sd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
ss <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
cd <- c(sal_depth_max, sal_depth_min, sal_depth_mean)
cs <- c(sal_surface_max, sal_surface_min, sal_surface_mean)
od <- c(o2_depth_max, o2_depth_min, o2_depth_mean)
os <- c(o2_surface_max, o2_surface_min, o2_surface_mean)
mlp <- c(mlp_surface_max, mlp_surface_min, mlp_surface_mean)
ssh <- c(ssh_surface_max, ssh_surface_min, ssh_surface_mean)
env_stats <- rbind(td, ts, sd, ss, cd, cs, od, os, mlp, ssh)
row.names(env_stats) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats) <- c("Max", "Min", "Mean")
write.csv(env_stats, "../output/env/env_correlates_basic_stats.csv", row.names = TRUE)
env_stats
```

Hmm some potentially suspicious values here in
- Oxygen Depth min
- mlp  Max
and maybe also
- Salinity Depth min
- Salinity Surface min
- CHL depth min
- CHL depth max

lets just see where these values appear and check the netcdf layers. Note I will check these values in cygwin using the cdo operator infov on the netcdfs (to ensure there was no issue with processing in R). 


get the year and month these values appeared in
```{r}
o2d_check_yr <- subset(presence$year, presence$o2_depth == o2_depth_min)
o2d_check_mth <- subset(presence$month, presence$o2_depth == o2_depth_min)
```
O2 depth min is in 2005_08

And value seems correct

```{r}
mlp_check_yr <- subset(presence$year, presence$mlp_surface == mlp_surface_max)
mlp_check_mth <- subset(presence$month, presence$mlp_surface == mlp_surface_max)
```
mlp max in in 2007_12

And yes the value seems correct

```{r}
sald_check_yr <- subset(presence$year, presence$salinity_depth == sal_depth_max)
sald_check_mth <- subset(presence$month, presence$salinity_depth == sal_depth_max)
```
year month 2001_05

also seems correct

```{r}
sals_check_yr <- subset(presence$year, presence$salinity_surface == sal_surface_min)
sals_check_mth <- subset(presence$month, presence$salinity_surface == sal_surface_min)
```
year month 21998_06

ok again...

Leave the rest


## simple plots of env. correlates

temperature
```{r}
par(mfrow=c(1,2))
plot(presence$temp_depth, main = "Temp at Depth (Various)", ylab = "Kelvin")
plot(presence$temp_surface, main = "Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```
Hmm not necessarily very helpful. But anyway, lets carry on

salinity
```{r}
par(mfrow=c(1,2))
plot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(presence$salinity_surface, main = "Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

Chlorophyll
```{r}
par(mfrow=c(1,2))
plot(presence$chl_depth, main = "Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$chl_surface, main = "Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

O2
```{r}
par(mfrow=c(1,2))
plot(presence$o2_depth, main = "Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(presence$o2_surface, main = "Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

mlp
```{r}
plot(presence$mlp_surface, main = "Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

SSH
```{r}
plot(presence$ssh_surface, main = "Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

XXX SAM - YOU WANT TO THINK ABOUT DOING THIS BY DEPTH RATHER THAN ALL DEPTHS)

```{r}
unique_depths <- unique(presence$depthlayerno)
unique_depthdordered <- sort(unique_depths) #just puts the list in oder with no NA
length(unique_depthdordered)
```
ouch - 39 layers... a job for a boxplot probably

```{r}

```



# frequency plots of env. corr


temperature
```{r}
hist(presence$temp_surface, main = "Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$temp_dept, main = "Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

chl
```{r}
hist(presence$chl_surface, main = "Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$chl_depth, main = "Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

salinity
```{r}
hist(presence$salinity_surface, main = "Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$salinity_depth, main = "Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

oxygen
```{r}
hist(presence$o2_surface, main = "Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(presence$o2_depth, main = "Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

MLP
```{r}
hist(presence$mlp, main = "Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

ssh
```{r}
hist(presence$ssh, main = "Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


## simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)



individual plots of each variable 
```{r}
par(mfrow=c(1,2))
boxplot(presence$temp_depth, main = "Temperature at Depth (Various)", ylab = "Kelvin")
boxplot(presence$temp_surface, main = "Temperature at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temp_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$salinity_depth, main = "Salinity at Depth (Various)", ylab = "PSU")
boxplot(presence$salinity_surface, main = "Salinity at Surface", ylab = "PSU")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$o2_depth, main = "O2 at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$o2_surface, main = "O2 at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/o2_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$chl_depth, main = "Chlorphyll at Depth (Various)", ylab = "mmol.m-3")
boxplot(presence$chl_surface, main = "Chlorphyll at Surface", ylab = "mmol.m-3")
dev.copy(png, "../output/env/simple_plots/chl_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

par(mfrow=c(1,2))
boxplot(presence$mlp_surface, main = "Density Mixed Layer Thickness", ylab = "meters")
boxplot(presence$ssh_surface, main = "Sea Surface Height", ylab = "meters")
dev.copy(png, "../output/env/simple_plots/mlp_ssh_boxplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_depth ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_depth ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_depth ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_depth ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_depth ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_depth ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_depth ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_depth ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp ~ presence$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Year")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh ~ presence$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Year")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_surface ~ presence$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_surface ~ presence$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_surface ~ presence$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_surface ~ presence$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$mlp_surface ~ presence$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Observation per Month")
dev.copy(png, "../output/env/simple_plots/mlp_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$ssh_surface ~ presence$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Observation per Month")
dev.copy(png, "../output/env/simple_plots/ssh_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$temp_surface ~ presence$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$chl_surface ~ presence$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$salinity_surface ~ presence$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(presence$o2_surface ~ presence$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Observation (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


# background points
Read in the presence points

```{r}
background <- read.csv("../output/bio/background_complete_obs_cels_globot.csv", header = TRUE)
head(background)
```

# environmental correlates

1) Get the max, min, an mean values and add into a dataframe

Temperature
```{r}
temp_depth_max_back <- max(background$temp_depth, na.rm = TRUE)
temp_depth_min_back <- min(background$temp_depth, na.rm = TRUE)
temp_depth_mean_back <- mean(background$temp_depth, na.rm = TRUE)
temp_surface_max_back <- max(background$temp_surface, na.rm = TRUE)
temp_surface_min_back <- min(background$temp_surface, na.rm = TRUE)
temp_surface_mean_back <- mean(background$temp_surface, na.rm = TRUE)
```

Salinity
```{r}
sal_depth_max_back <- max(background$salinity_depth, na.rm = TRUE)
sal_depth_min_back <- min(background$salinity_depth, na.rm = TRUE)
sal_depth_mean_back <- mean(background$salinity_depth, na.rm = TRUE)
sal_surface_max_back <- max(background$salinity_surface, na.rm = TRUE)
sal_surface_min_back <- min(background$salinity_surface, na.rm = TRUE)
sal_surface_mean_back <- mean(background$salinity_surface, na.rm = TRUE)
```

Chl
```{r}
chl_depth_max_back <- max(background$chl_depth, na.rm = TRUE)
chl_depth_min_back <- min(background$chl_depth, na.rm = TRUE)
chl_depth_mean_back <- mean(background$chl_depth, na.rm = TRUE)
chl_surface_max_back <- max(background$chl_surface, na.rm = TRUE)
chl_surface_min_back <- min(background$chl_surface, na.rm = TRUE)
chl_surface_mean_back <- mean(background$chl_surface, na.rm = TRUE)
```

O2
```{r}
o2_depth_max_back <- max(background$o2_depth, na.rm = TRUE)
o2_depth_min_back <- min(background$o2_depth, na.rm = TRUE)
o2_depth_mean_back <- mean(background$o2_depth, na.rm = TRUE)
o2_surface_max_back <- max(background$o2_surface, na.rm = TRUE)
o2_surface_min_back <- min(background$o2_surface, na.rm = TRUE)
o2_surface_mean_back <- mean(background$o2_surface, na.rm = TRUE)
```

MLP
```{r}
mlp_surface_max_back <- max(background$mlp_surface, na.rm = TRUE)
mlp_surface_min_back <- min(background$mlp_surface, na.rm = TRUE)
mlp_surface_mean_back <- mean(background$mlp_surface, na.rm = TRUE)
```

SSH
```{r}
ssh_surface_max_back <- max(background$ssh_surface, na.rm = TRUE)
ssh_surface_min_back <- min(background$ssh_surface, na.rm = TRUE)
ssh_surface_mean_back <- mean(background$ssh_surface, na.rm = TRUE)
```

create matrix
```{r}
tdb <- c(temp_depth_max_back, temp_depth_min_back, temp_depth_mean_back)
tsb <- c(temp_surface_max_back, temp_surface_min_back, temp_surface_mean_back)
sdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
ssb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
cdb <- c(sal_depth_max_back, sal_depth_min_back, sal_depth_mean_back)
csb <- c(sal_surface_max_back, sal_surface_min_back, sal_surface_mean_back)
odb <- c(o2_depth_max_back, o2_depth_min_back, o2_depth_mean_back)
osb <- c(o2_surface_max_back, o2_surface_min_back, o2_surface_mean_back)
mlpb <- c(mlp_surface_max_back, mlp_surface_min_back, mlp_surface_mean_back)
sshb <- c(ssh_surface_max_back, ssh_surface_min_back, ssh_surface_mean_back)
env_stats_back <- rbind(tdb, tsb, sdb, ssb, cdb, csb, odb, osb, mlpb, sshb)
row.names(env_stats_back) <- c("Temp Depth", "Temp Surface", "Salinity Depth", "Salinity Surface", "Chl Depth", "Chl Surface", "Oxygen Depth", "Oxygen Surface", "MLP", "SSH") 
colnames(env_stats_back) <- c("Max", "Min", "Mean")
write.csv(env_stats_back, "../output/env/env_correlates_background_basic_stats.csv", row.names = TRUE)
env_stats_back
```

temperature
```{r}
par(mfrow=c(1,2))
plot(background$temp_depth, main = "Background Points Temp at Depth (Various)", ylab = "Kelvin")
plot(background$temp_surface, main = "Background Points Temp at Surface", ylab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


salinity
```{r}
par(mfrow=c(1,2))
plot(background$salinity_depth, main = "Background Points Salinity at Depth (Various)", ylab = "Practical Salinity Units")
plot(background$salinity_surface, main = "Background Points Salinity at Surface", ylab = "Practical Salinity Units")
dev.copy(png,"../output/env/simple_plots/salinity_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

Chlorophyll
```{r}
par(mfrow=c(1,2))
plot(background$chl_depth, main = "Background Points Chl at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$chl_surface, main = "Background Points Chl at Surface", ylab = "Chl (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/chl_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

O2
```{r}
par(mfrow=c(1,2))
plot(background$o2_depth, main = "Background Points Dissolved Oxygen at Depth (Various)", ylab = "Chl (mmol.m-3)")
plot(background$o2_surface, main = "Background Points Dissolved Oxygen at Surface", ylab = "O2 (mmol.m-3)")
dev.copy(png,"../output/env/simple_plots/o2_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

mlp
```{r}
plot(background$mlp_surface, main = "Background Points Mixed Layer Thickness", ylab = "Depth (m)")
dev.copy(png,"../output/env/simple_plots/mlp_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

SSH
```{r}
plot(background$ssh_surface, main = "Background Points Sea Surface Height", ylab = "Height (m)")
dev.copy(png,"../output/env/simple_plots/ssh_background_simpleplot.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## frequency plots of env. corr


temperature
```{r}
hist(background$temp_surface, main = "Background Temp at Surface", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$temp_dept, main = "Background Temp at Depth", xlab = "Kelvin")
dev.copy(png, "../output/env/simple_plots/temperature_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

chl
```{r}
hist(background$chl_surface, main = "Background Chlorophyll at Surface", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$chl_depth, main = "Background Chlorophyll at Observation Depth", xlab = "Chlorophyll Concentrations (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/chl_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

salinity
```{r}
hist(background$salinity_surface, main = "Background Salinity at Surface", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$salinity_depth, main = "Background Salinity at Observation Depth", xlab = "Salinity Practical Salinity Unit (PSU)")
dev.copy(png, "../output/env/simple_plots/sal_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

oxygen
```{r}
hist(background$o2_surface, main = "Background Dissolved Oxygen at Surface", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
hist(background$o2_depth, main = "Background Dissolved Oxygen at Observation Depth", xlab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)")
dev.copy(png, "../output/env/simple_plots/o2_background_depth_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

MLP
```{r}
hist(background$mlp, main = "Background Mixed Layer Thickness at Observation", xlab = "Mixed Layer Thickness (m)")
dev.copy(png, "../output/env/simple_plots/mlp_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

ssh
```{r}
hist(background$ssh, main = "Background Sea Surface Height at Observation", xlab = "Sea Surface Height (m)")
dev.copy(png, "../output/env/simple_plots/ssh_background_surface_histogram.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## simple boxplots of env. corr

Same as above but with boxplots (may provide some more useful info)

temperature


```{r}
boxplot(background$temp_depth ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_depth ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_depth ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_depth ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_depth ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_depth ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_depth ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_depth ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background Depth per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp ~ background$year, xlab = "Year", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Year")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh ~ background$year, xlab = "Year", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Year")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_dept_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_surface ~ background$month, xlab = "month", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_surface ~ background$month, xlab = "month", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/chlorophyll_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_surface ~ background$month, xlab = "month", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_surface ~ background$month, xlab = "month", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Month")
dev.copy(png, "../output/env/simple_plots/o2_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$mlp_surface ~ background$month, xlab = "month", ylab = "Mixed Layer Thickness (m)", main = "Mixed Layer Thickness at Background per Month")
dev.copy(png, "../output/env/simple_plots/mlp_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$ssh_surface ~ background$month, xlab = "month", ylab = "Sea Surface Height (m)", main = "Sea Surface Height at Background per Month")
dev.copy(png, "../output/env/simple_plots/ssh_background_boxplot_surface_month.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$temp_surface ~ background$year, xlab = "Year", ylab = "Temperature (Kelvin)", main = "Temperature at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/temperature_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$chl_surface ~ background$year, xlab = "Year", ylab = "Chlorophyll Concentrations (mmol.m-3)", main = "Chlorophyll at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/chl_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$salinity_surface ~ background$year, xlab = "Year", ylab = "Salinity Practical Salinity Unit (PSU)", main = "Salinity at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/salinity_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
boxplot(background$o2_surface ~ background$year, xlab = "Year", ylab = "Mole Concentration of Dissolved Oxygen in Sea Water (mmol.m-3)", main = "Dissolved Oxygen at Background (Surface) per Year")
dev.copy(png, "../output/env/simple_plots/oxygen_background_boxplot_surface_year.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

## correlations between background points

check to see if there are any correlations in the env. variables for the background points


first subset the env.correlate columns (you don't need everything)

```{r}
background_env <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
background_env_cor <- cor(background_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_env_cor, "../output/env/background_env_corr.csv", row.names = TRUE)
background_corrplot <- corrplot(background_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envcorr.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
```{r}
par(mfrow=c(4,4))
for(i in 1:16){
  plot(density(background_env[,i], na.rm=T), main = names(background_env)[i])
}
```
PUT THE CHUNK OUTPUT BACK TO INLINE


add nafo region and gear type into the mix


first subset the env.correlate columns + bottom_depth (you don't need everything)

```{r}
background_envbotdepth <- subset(background, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
background_envbotdepth_cor <- cor(background_envbotdepth, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(background_envbotdepth_cor, "../output/env/background_envbotdepth_cor.csv", row.names = TRUE)
background_corrplot <- corrplot(background_envbotdepth_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/background_envbotdepth_cor.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


## correlations between presence points

check to see if there are any correlations in the env. variables

first subset the env.correlate columns (you don't need everything) then use cor to get the correlation values, and then corrplot for a visual

```{r}
pres_env <- subset(presence, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth, bottom_depth))
pres_env_cor <- cor(pres_env, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(pres_env_cor, "../output/env/pres_env_corr.csv", row.names = TRUE)
pres_corrplot <- corrplot(pres_env_cor , method = "color", type = "upper", order = "alphabet", tl.cex = 0.8) 
dev.copy(png,"../output/env/simple_plots/pres_envcorr.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

to get some density plots all in one graphic, you need to change chunk output to in console, then go to the plot tab, make it fill the screen, then run then make bigger then save :( (probably a better way - this seems to be an issue with RStudio)
```{r}
par(mfrow=c(4,4))
for(i in 1:16){
  plot(density(pres_env[,i], na.rm=T), main = names(pres_env)[i])
}
```
PUT THE CHUNK OUTPUT BACK TO INLINE


## density plot with background and presence env. data

Inspired by Merrow 2013 - top paragraph of page 1063 (are the species observations uniformly distributed over the background, or are they skewed)

```{r}
ggplot(pres_env, aes(x = temp_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = temp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/temp_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = chl_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = chl_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/chl_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = salinity_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = salinity_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/salinity_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = o2_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_surface_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = o2_depth)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/o2_depth_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = mlp_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/mlp_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

```{r}
ggplot(pres_env, aes(x = ssh_surface)) + geom_density(na.rm = TRUE, colour = "red") + geom_density(data=background_env, na.rm = TRUE, colour = "blue")
dev.copy(png,"../output/env/simple_plots/ssh_backvspres.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```


# NAFO Regions

compare the environmental correlates between different NAFO regions

first see which nafo zones were sampled in each year
```{r presence nafo by year}
nafo_by_yr <- with(presence, table(year, nafo_zone))
write.csv(nafo_by_yr, file = "../output/bio/nafozone_by_yr.csv")
nafo_by_yr
```

and by month
```{r prsence nafo by month}
nafo_by_mth <- with(presence, table(nafo_zone, month))
write.csv(nafo_by_mth, file = "../output/bio/nafozone_by_mth.csv")
nafo_by_mth
```

Interesting that there is a point in 1C - this is outside Canadian waters.... anyway
```{r}
onec <-  subset(presence, nafo_zone == "1C")
onec
```

## density plot with background and presence env. data by NAFO region

What I want to see if if there is a marked difference between the env. correlates of the presence points between NAFO regions. This is also to try deal with sampling bias (b/c the whole region is not uniformly sampled in each month, but rather nafo regions have a strong month bias)

first create NAFO-region datasets

```{r presence by nafo}
nafo0a <- subset(presence, nafo_zone == "0A")
nafo0b <- subset(presence, nafo_zone == "0B")
nafo1c <- subset(presence, nafo_zone == "1C")
nafo2g <- subset(presence, nafo_zone == "2G")
nafo2h <- subset(presence, nafo_zone == "2H")
nafo2j <- subset(presence, nafo_zone == "2J")
nafo3k <- subset(presence, nafo_zone == "3K")
nafo3l <- subset(presence, nafo_zone == "3L")
nafo3m <- subset(presence, nafo_zone == "3M")
nafo3n <- subset(presence, nafo_zone == "3N")
nafo3o <- subset(presence, nafo_zone == "3O")
nafo3ps <- subset(presence, nafo_zone == "3Ps")
nafo4r <- subset(presence, nafo_zone == "4R")
nafo4s <- subset(presence, nafo_zone == "4S")
nafo4t <- subset(presence, nafo_zone == "4T")
nafo4vn <- subset(presence, nafo_zone == "4Vn")
nafo4vs <- subset(presence, nafo_zone == "4Vs")
nafo4w <- subset(presence, nafo_zone == "4W")
nafo4x <- subset(presence, nafo_zone == "4X")
nafo5y <- subset(presence, nafo_zone == "5Y")
nafo5ze <- subset(presence, nafo_zone == "5ZE")
nafohudson <- subset(presence, nafo_zone == "HudsonStrait")
```

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
```{r presence by nafo by variable}
ggplot(nafo0a, aes(x = temp_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/temp_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = temp_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/temp_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = chl_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/chl_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = chl_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/chl_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = salinity_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/salinity_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = salinity_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/salinity_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = o2_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_surface_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = o2_depth, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/o2_depth_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = mlp_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/mlp_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(nafo0a, aes(x = ssh_surface, colour = nafo_zone)) + geom_density(na.rm = TRUE) + geom_density(data=nafo0b, na.rm = TRUE) + geom_density(data=nafo2g, na.rm = TRUE) + geom_density(data=nafo2h, na.rm = TRUE) + geom_density(data=nafo2j, na.rm = TRUE) + geom_density(data=nafo3k, na.rm = TRUE) + geom_density(data=nafo3l, na.rm = TRUE) + geom_density(data=nafo3n, na.rm = TRUE) + geom_density(data=nafo3o, na.rm = TRUE) + geom_density(data=nafo3ps, na.rm = TRUE) + geom_density(data=nafo4r, na.rm = TRUE) + geom_density(data=nafo4s, na.rm = TRUE) + geom_density(data=nafo4t, na.rm = TRUE) + geom_density(data=nafo4vn, na.rm = TRUE) + geom_density(data=nafo4vs, na.rm = TRUE) + geom_density(data=nafo4w, na.rm = TRUE) + geom_density(data=nafo4x, na.rm = TRUE) + geom_density(data=nafo5y, na.rm = TRUE) + geom_density(data=nafohudson, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_nafo_plots/ssh_nafo.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```



Let's plot the variables by nafo region/year then by month


```{r}
pr98 <- subset(presence, year == "1998")
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
```

```{r}
pr98 <- subset(presence, year == "1998")
pr11 <- subset(presence, year == "2011")
par(mfrow=c(2,1))
boxplot(pr98$temp_depth ~ pr98$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
boxplot(pr11$temp_depth ~ pr11$nafo_zone, xlab = "NAFO Region", ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth per NAFO Zone")
```

ok scrap the nafo region analysis - it is too mixed up with month (so seeing if region or month is a factor that needs to be factored in to the model is not appropriate)

# remap who sampled

Now lets check the number of records and spatial-temporal distribution of the observations by institution code to make sure none are dodgy

first a table of how many observations each instituioncode has
```{r institution code analysis - count}
obs_by_ins <- count(presence, "institutioncode")
obs_by_ins
write.csv(obs_by_ins, file = "../output/bio/samplinginstitutions/no_observations_institutioncode.csv")
```
ok so IMR, NAFO, ICES, & USM have very few entries (1, 1, 1, and 3 respectively)

Lets take a look at the spatial breakdown of these institutions.First all points...

```{r}
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(38, 70), asp = 1, main = "All Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(presence$decimalLongitude, presence$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/all_occurrences.png") #this prints a png of the plot
dev.off() #this turns off the print command
```
Note there is one point up by iceland that you should get rid of (Icelandic population thought to be seperate from Labrador, but it is unclear if this is true or not).

Map the institutioncode == ARC only data...
```{r map obs by institutuion}
ARC_obs <- presence[presence$institutioncode == "ARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Arc Occurrences", col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ARC_obs$decimalLongitude, ARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOCENARC_obs <- presence[presence$institutioncode == "DFOCENARC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Central & Arctic Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOCENARC_obs$decimalLongitude, DFOCENARC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOCENARC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOGulf_obs <- presence[presence$institutioncode == "DFOGulf", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Gulf Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOGulf_obs$decimalLongitude, DFOGulf_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOGulf_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOISDM_obs <- presence[presence$institutioncode == "DFOISDM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO ISDM Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOISDM_obs$decimalLongitude, DFOISDM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOISDM_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOMTMS_obs <- presence[presence$institutioncode == "DFOMTMS", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Maritimes Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOMTMS_obs$decimalLongitude, DFOMTMS_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOMTMS_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFONL_obs <- presence[presence$institutioncode == "DFONL", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Newfouundland & Labrador Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFONL_obs$decimalLongitude, DFONL_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFONL_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

DFOQC_obs <- presence[presence$institutioncode == "DFOQC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "DFO Quebec Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(DFOQC_obs$decimalLongitude, DFOQC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/DFOQC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

MaineDMR_obs <- presence[presence$institutioncode == "MaineDMR", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Maine DMR Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(MaineDMR_obs$decimalLongitude, MaineDMR_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/MaineDMR_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

NEFSC_obs <- presence[presence$institutioncode == "NEFSC", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "NEFSC Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(NEFSC_obs$decimalLongitude, NEFSC_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/NEFSC_occurrences_all.png") #this prints a png of the plot
dev.off() #this turns off the print command

ROM_obs <- presence[presence$institutioncode == "ROM", ]
map2 <- getMap(resolution = "low") #creates an object called map at low resoultion
plot(map2, xlim = c(-70, -43), ylim =c(39, 70), asp = 1, main = "Royal Ontario Museum Occurrences",  col = "cornsilk") #the x and y lim are the long-lat bounds of the map
points(ROM_obs$decimalLongitude, ROM_obs$decimalLatitude, col = "red") #this adds points to the mapet", xlab = "Longitude", ylab = "Latitude")
dev.copy(png, "../output/bio/samplinginstitutions/ROM_occurrences_all.png") #this prints a png of the plot
```

# check for gear type

what are the unique gear types you have in your presence data, and how many?

```{r observation by cell - total}
gear_count <- count(presence, "gear")
write.csv(gear_count, file = "../output/bio/gear_count.csv")
gear_count
```

so the vast majority are trawls of some type. 

map the gear usage in Arcgis (gear_type_map)

create a table of gear use by month

```{r}
gearby_mth <- with(presence, table(month, gear))
write.csv(gearby_mth, file = "../output/bio/gear_by_mth.csv")
gearby_mth
```

What I want to see if if there is a marked difference between the env. correlates of the presence points between gear type used. This is also to try deal with detection bias between gear type (b/c the whole region is not uniformly sampled by the same gear type)

first create gear datasets

```{r presence by gear}
presence$gear <- as.character(presence$gear)
bottom_trawl <- subset(presence, gear == "bottom_trawl")
bottom_trawl_alfredo_03 <- subset(presence, gear == "bottom_trawl_alfredo_03")
bottom_trawl_campelen_14 <- subset(presence, gear == "bottom_trawl_campelen_14")
bottom_trawl_campelen_1800 <- subset(presence, gear == "bottom_trawl_campelen_1800")
bottom_trawl_campelen_21 <- subset(presence, gear == "bottom_trawl_campelen_21")
bottom_trawl_cosmos_2600 <- subset(presence, gear == "bottom_trawl_cosmos_2600")
bottom_trawl_western_IIA <- subset(presence, gear == "bottom_trawl_western_IIA")
unknown <- subset(presence, gear == "unknown")
vertical_plankton_tow <- subset(presence, gear == "vertical_plankton_tow")
```

plot by each variable, less 3m (2 samples) 1c (one sample) and 5ze (zero samples?!)
```{r presence by gear by variable}
ggplot(bottom_trawl, aes(x = temp_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/temp_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = temp_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/temp_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = chl_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/chl_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = chl_depth, colour = gear))  + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/chl_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = salinity_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/salinity_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = salinity_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/salinity_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = o2_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_surface_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = o2_depth, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/o2_depth_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = mlp_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/mlp_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png

ggplot(bottom_trawl, aes(x = ssh_surface, colour = gear)) + geom_density(na.rm = TRUE) + geom_density(data=bottom_trawl_alfredo_03, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_14, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_1800, na.rm = TRUE) + geom_density(data=bottom_trawl_campelen_21, na.rm = TRUE) + geom_density(data=bottom_trawl_cosmos_2600, na.rm = TRUE) + geom_density(data=bottom_trawl_western_IIA, na.rm = TRUE) + geom_density(data=unknown, na.rm = TRUE) + geom_density(data=vertical_plankton_tow, na.rm = TRUE)
dev.copy(png,"../output/env/env_by_gear_plots/ssh_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```





```{r}
par(mar=c(7,5,1,1))
boxplot(presence$temp_depth ~ presence$gear, ylab = "Temperature (Kelvin)", main = "Temperature at Observation Depth by gear type", las = 2)
dev.copy(png, "../output/env/simple_plots/temperature_boxplot_dept_gear.png") # to automatically save the plot to a png AND show it inline
dev.off() # stops automatic saving of the plot to a png
```

looks like not much difference... what about a kruskal wallace test?

```{r}
kruskal.test(presence$temp_depth ~ presence$gear)
```
Ok so there is a statistically sig difference somewhere in the temp at depth reported by the gear type (Kruskal-Wallis chi-squared = 248.27, df = 7, p-value < 2.2e-16)

To see where the difference(s) are run a Dunn test Zar (2010) states that the Dunn test (in the FSA package) is appropriate for groups with unequal numbers of observations.(Zar, J.H. 2010. Biostatistical Analysis, 5th ed.  Pearson Prentice Hall: Upper Saddle River, NJ.) http://rcompanion.org/rcompanion/d_06.html

SAM WHEN DUNN IS INSTALLED, GO TO RCOMPANIONS.ORG AND LOOK FOR DUNN - CHECK CHROME HISTORY (PROBABLY UNDER KRUSKAL WALLIS) ALSO MAP THE DISTRIBUTION OF THESE GEAR TYPES
```{r}
pairwise.wilcox.test(presence$temp_depth, presence$gear, p.adj='bonferroni', exact=F)
```

dunn test 
```{r}
gear_temp_depthdunn <- dunnTest(presence$temp_depth, presence$gear, list = TRUE)
gear_temp_depthdunn



write.csv(table, "../output/env/gear_temp_depthdunn.csv", row.names = TRUE)
```


#vif

for this you need the joined dataset. this was created under mergig_datasets.src and the file is ../output/bio/presab.csv

```{r}
presab <- read.csv("../output/bio/presab.csv", header = TRUE)
```


```{r}
vif_allpresab <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allpresab, "../output/bio/vif_allpresab.csv", row.names = TRUE)
vif_allpresab
```
interpret - see https://stats.stackexchange.com/questions/70679/which-variance-inflation-factor-should-i-be-using-textgvif-or-textgvif/96584#96584
To make GVIFs comparable across dimensions, we suggested using GVIF^(1/(2*Df)), where Df is the number of coefficients in the subset (ref fox and motette 1992 in zotero)
or the 2 continuous variables, GVIFˆ(1/(2*Df)) (which is basically the square root of the VIF/GVIF value as DF=1) is the proportional change of the standard error and confidence interval of their coefficients due to the level of collinearity. The GVIFˆ(1/(2*Df)) value of the categorical variable is a similar measure for the reduction in precision of the coefficients' estimation due to collinearity. 
apparently i just need to square GVIF^(1/(2*Df)) and then use the normal VIF "rule of thumb"...

```{r}
vif_allpresab_sq <- read.csv("../output/bio/vif_allpresab.csv", header = TRUE)
vif_allpresab_sq$GVIF2Dfsq <- vif_allpresab_sq$GVIF..1..2.Df..^2
write.csv(vif_allpresab_sq, "../output/bio/vif_allpresab_sq.csv", row.names = TRUE)
vif_allpresab_sq
```

As per SLR suggestion, rerun without gear

```{r vif all but gear}
vif_allbutgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutgear, "../output/bio/vif_allbutgear.csv", row.names = TRUE)
vif_allbutgear
```

As per SLR suggestion, rerun without gear and most highly correlated variables 

```{r vif without gear + high corr}
vif_allbutgearhighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutgearhighlycorr, "../output/bio/vif_allbutgearhighlycorr.csv", row.names = TRUE)
vif_allbutgearhighlycorr
```


As per SLR suggestion, rerun with gear but without most highly correlated variables

```{r vif all but high corr}
vif_allbuthighlycorr <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter + gear, data = presab))
write.csv(vif_allbuthighlycorr, "../output/bio/vif_allbuthighlycorr.csv", row.names = TRUE)
vif_allbuthighlycorr

vif_allbuthighlycorr_sq <- read.csv("../output/bio/vif_allbuthighlycorr.csv", header = TRUE)
vif_allbuthighlycorr_sq$GVIF2Dfsq <- vif_allbuthighlycorr_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighlycorr_sq, "../output/bio/vif_allbuthighlycorr_sq.csv", row.names = TRUE)
vif_allbuthighlycorr_sq
```

ok remove one variable at a time - leave gear in

remove amo_prev
```{r vif all but amoprev}
vif_allbutamo_prev <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prev, "../output/bio/vif_allbutamo_prev.csv", row.names = TRUE)
vif_allbutamo_prev

vif_allbutamo_prev_sq <- read.csv("../output/bio/vif_allbutamo_prev.csv", header = TRUE)
vif_allbutamo_prev_sq$GVIF2Dfsq <- vif_allbutamo_prev_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutamo_prev_sq, "../output/bio/vif_allbutamo_prev_sq.csv", row.names = TRUE)
vif_allbutamo_prev_sq
```
and leave gear out

remove amo_prev + gear
```{r vif all but amoprev + gear}
vif_allbutamo_prevgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbutamo_prevgear, "../output/bio/vif_allbutamo_prevgear.csv", row.names = TRUE)
vif_allbutamo_prevgear
```

remove chl_depth
```{r vif all but chldepth}
vif_allbutchl_depth <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depth, "../output/bio/vif_allbutchl_depth.csv", row.names = TRUE)
vif_allbutchl_depth

vif_allbutchl_depth_sq <- read.csv("../output/bio/vif_allbutchl_depth.csv", header = TRUE)
vif_allbutchl_depth_sq$GVIF2Dfsq <- vif_allbutchl_depth_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutchl_depth_sq, "../output/bio/vif_allbutchl_depth_sq.csv", row.names = TRUE)
vif_allbutchl_depth_sq
```

remove chl_depth and gear
```{r vif all but chldepth+gear}
vif_allbutchl_depthgear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutchl_depthgear, "../output/bio/vif_allbutchl_depthgear.csv", row.names = TRUE)
vif_allbutchl_depthgear
```


remove ssh_surface
```{r vif all but ssh}
vif_allbutssh_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surface, "../output/bio/vif_allbutssh_surface.csv", row.names = TRUE)
vif_allbutssh_surface

vif_allbutssh_surface_sq <- read.csv("../output/bio/vif_allbutssh_surface.csv", header = TRUE)
vif_allbutssh_surface_sq$GVIF2Dfsq <- vif_allbutssh_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutssh_surface_sq, "../output/bio/vif_allbutssh_surface_sq.csv", row.names = TRUE)
vif_allbutssh_surface_sq
```

remove ssh_surface & gear
```{r vif all but ssh+gear}
vif_allbutssh_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutssh_surfacegear, "../output/bio/vif_allbutssh_surfacegear.csv", row.names = TRUE)
vif_allbutssh_surfacegear
```

remove o2_surface
```{r vif all but o2surface}
vif_allbuto2_surface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surface, "../output/bio/vif_allbuto2_surface.csv", row.names = TRUE)
vif_allbuto2_surface

vif_allbuto2_surface_sq <- read.csv("../output/bio/vif_allbuto2_surface.csv", header = TRUE)
vif_allbuto2_surface_sq$GVIF2Dfsq <- vif_allbuto2_surface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuto2_surface_sq, "../output/bio/vif_allbuto2_surface_sq.csv", row.names = TRUE)
vif_allbuto2_surface_sq
```

remove o2_surface & gear
```{r vif all but o2surface+gear}
vif_allbuto2_surfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuto2_surfacegear, "../output/bio/vif_allbuto2_surfacegear.csv", row.names = TRUE)
vif_allbuto2_surfacegear
```

as per SLR chat jan 23: 

remove salinity_surface only
```{r vif all but salsurface}
vif_allbutsalinitysurface <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurface, "../output/bio/vif_allbutsalinitysurface.csv", row.names = TRUE)
vif_allbutsalinitysurface

vif_allbutsalinitysurface_sq <- read.csv("../output/bio/vif_allbutsalinitysurface.csv", header = TRUE)
vif_allbutsalinitysurface_sq$GVIF2Dfsq <- vif_allbutsalinitysurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutsalinitysurface_sq, "../output/bio/vif_allbutsalinitysurface_sq.csv", row.names = TRUE)
vif_allbutsalinitysurface_sq
```
and without gear

```{r vif all but salsurface+gear}
vif_allbutsalinitysurfacegear <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbutsalinitysurfacegear, "../output/bio/vif_allbutsalinitysurfacegear.csv", row.names = TRUE)
vif_allbutsalinitysurfacegear
```

now remove temp_surface plus highly correlated
```{r vif allbut high corr + temp surface/gear/nogear}
#with gear
vif_allbuthighcorrtempsurface <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurface, "../output/bio/vif_allbuthighcorrtempsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface

vif_allbuthighcorrtempsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsurface.csv", header = TRUE)
vif_allbuthighcorrtempsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsurface_sq, "../output/bio/vif_allbuthighcorrtempsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsurface_sq

#without gear
vif_allbuthighcorrtempsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_surface + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsurfacegear, "../output/bio/vif_allbuthighcorrtempsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsurfacegear
```

now remove temp_surface plus salinity_surface
```{r vif allbut temp + salinity surface/gear/nogear}
#with gear
vif_allbuttempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurface, "../output/bio/vif_allbuttempsalsurface.csv", row.names = TRUE)
vif_allbuttempsalsurface

vif_allbuttempsalsurface_sq <- read.csv("../output/bio/vif_allbuttempsalsurface.csv", header = TRUE)
vif_allbuttempsalsurface_sq$GVIF2Dfsq <- vif_allbuttempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuttempsalsurface_sq, "../output/bio/vif_allbuttempsalsurface_sq.csv", row.names = TRUE)
vif_allbuttempsalsurface_sq

#without gear
vif_allbuttempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_prev + amo_winter, data = presab))
write.csv(vif_allbuttempsalsurfacegear, "../output/bio/vif_allbuttempsalsurfacegear.csv", row.names = TRUE)
vif_allbuttempsalsurfacegear
```

now remove temp_surface plus salinity_surface + highly correlated
```{r vif allbut high corr +  temp + salinity surface/gear/nogear}
#with gear
vif_allbuthighcorrtempsalsurface <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + gear + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurface, "../output/bio/vif_allbuthighcorrtempsalsurface.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface

vif_allbuthighcorrtempsalsurface_sq <- read.csv("../output/bio/vif_allbuthighcorrtempsalsurface.csv", header = TRUE)
vif_allbuthighcorrtempsalsurface_sq$GVIF2Dfsq <- vif_allbuthighcorrtempsalsurface_sq$GVIF..1..2.Df..^2
write.csv(vif_allbuthighcorrtempsalsurface_sq, "../output/bio/vif_allbuthighcorrtempsalsurface_sq.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurface_sq

#without gear
vif_allbuthighcorrtempsalsurfacegear <- vif(lm(occurrence ~ temp_depth + salinity_depth + o2_depth + chl_surface + bottom_depth + mlp_surface + nao_sample + nao_prev + nao_winter + amo_sample + amo_winter, data = presab))
write.csv(vif_allbuthighcorrtempsalsurfacegear, "../output/bio/vif_allbuthighcorrtempsalsurfacegear.csv", row.names = TRUE)
vif_allbuthighcorrtempsalsurfacegear
```

Just out of curiosity, a vif will all but but atmos drivers

```{r}
vif_allbutnaoamo <- vif(lm(occurrence ~ temp_surface + temp_depth + salinity_surface + salinity_depth + o2_surface + o2_depth + chl_surface + chl_depth + bottom_depth + mlp_surface + ssh_surface + gear, data = presab))
write.csv(vif_allbutnaoamo, "../output/bio/vif_allbutnaoamo.csv", row.names = TRUE)
vif_allbutnaoamo

vif_allbutnaoamo_sq <- read.csv("../output/bio/vif_allbutnaoamo.csv", header = TRUE)
vif_allbutnaoamo_sq$GVIF2Dfsq <- vif_allbutnaoamo_sq$GVIF..1..2.Df..^2
write.csv(vif_allbutnaoamo_sq, "../output/bio/vif_allbutnaoamo_sq.csv", row.names = TRUE)
vif_allbutnaoamo_sq
```

# check to see how many cells have >1 point per timeslice

this needs to be 3D so you need to create a cell_id_3d
```{r cell_id_3d}
presab$cell_id_3d <- paste(presab$cell_id, presab$depthlayerno, sep="_") #create a new column that is a unique cell_id & depth ID. 
head(presab)
```
and now do one with a timeslice label
```{r cell_id_xyzt}
presab$cell_id_xyzt <- paste(presab$cell_id_3d, presab$year, presab$month, sep="_") #create a new column that is a unique cell_id & depth ID. 
head(presab)
write.csv(presab, "../output/bio/presab_cellid_xyzt.csv", row.names = FALSE)
```


because background points can land in the same xyzt, subset the dataset to be only presences

```{r}
presencedup <- subset(presab, occurrence == "1")
nrow(presencedup)
```


the data is in presencedup$total_cell_obs_xyzt
```{r unique_pointpercell}
unique_pointpercell <- unique(presencedup$total_cell_obs_xyzt)
unique_pointpercell
```
bugger... ok so lets do a frequency table
```{r unique_pointpercell frequencies}
pointpercellcount <- count(celobsmore1, "total_cell_obs_xyzt")
pointpercellcount$no_cells <- pointpercellcount$freq / pointpercellcount$total_cell_obs_xyzt
write.csv(pointpercellcount, file = "../output/bio/pointpercellcount.csv")
pointpercellcount
```

so there are 949 3d timesliced cells that contain more than one occurrence, and 2042 rows. 
- different sampling days
- different coordinates
- risk of duplicate entries (but only if two instituions inputted the same data with slightly different coordinates and different reserch program names - see cell3985_18_2000_7 below as example)

```{r}
cell3985_18_2000_7  <- subset(celobsmore1, cell_id_xyzt == "3985_18_2000_7")
cell3985_18_2000_7 
```

Ideally you only want one observation cell cell_id_xyzt. First see if any are missing depth data

find the column numbers that correspond to the env. data (use fastmatch package as is quicker)

```{r}
fmatch("chl_surface", names(celobsmore1)) #28
fmatch("chl_depth", names(celobsmore1)) #29
fmatch("mlp_surface", names(celobsmore1)) #30
fmatch("o2_surface", names(celobsmore1)) #31
fmatch("o2_depth", names(celobsmore1)) #32
fmatch("salinity_surface", names(celobsmore1)) #33
fmatch("salinity_depth", names(celobsmore1)) #34
fmatch("ssh_surface", names(celobsmore1)) #35
fmatch("temp_surface", names(celobsmore1)) #36
fmatch("temp_depth", names(celobsmore1)) #37
```

then subset any with NA values

```{r celobsmore1_novals}
celobsmore1_novals <-  celobsmore1[!complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_novals) #just to check how many there are - 1095 out of 2042
```

ok and subset with all values

```{r celobsmore1_allval}
celobsmore1_allvals <- celobsmore1[complete.cases(celobsmore1[28:37]), ] #28:37 represent the columns w/ env. data
nrow(celobsmore1_allvals) #947 out of 2042
```

ok so how many unique cells do we have with complete data...

```{r}
pointpercellcount_alldata <- count(celobsmore1_allvals, "total_cell_obs_xyzt")
pointpercellcount_alldata$no_cells <- pointpercellcount_alldata$freq / pointpercellcount_alldata$total_cell_obs_xyzt
write.csv(pointpercellcount_alldata, file = "../output/bio/pointpercellcount_alldata.csv")
pointpercellcount_alldata
```

and see which cell_id_xyzt appear in both celobsmore1_allvals and celobsmore1_novals
```{r}
celobsmore1_allnovals <- inner_join(celobsmore1_allvals, celobsmore1_novals)
celobsmore1_allnovals 
```
None.... which is good. I'd expect observations in the same xyzt to have the same values

so, I can successfully reduce the observations down to one per xyzt

```{r}
fmatch("cell_id_xyzt", names(celobsmore1)) #46
duplicatesbegone <- celobsmore1[!duplicated(celobsmore1[ ,46]), ] #creates a df with all duplicates gone (now 1 obs per cell)
nrow(duplicatesbegone) #949 - good
presabnodup <- subset(presab, total_cell_obs_xyzt == "1") #remove all rows that have more than one obs
nrow(presabnodup) #1549351 - this adds up 
presab <- rbind(presabnodup, duplicatesbegone)
nrow(presab) #1550300 - great!

#recalculate the total_cell_obs_xyzt column

names(presab)[names(presab)=="total_cell_obs_xyzt"] <- "XXtotal_cell_obs_xyzt" #rename col
obs_cell_yymm_depth_dup <- count(presab, cell_id, year, month, depthlayerno)
test <- merge(presab, obs_cell_yymm_depth_dup, by.x=c("cell_id", "year", "month", "depthlayerno"), by.y=c("cell_id", "year", "month", "depthlayerno"))
names(test)[names(test)=="n"] <- "total_cell_obs_xyzt" #rename col

write.csv(presab, "../output/bio/presab_cellid_xyzt_nodup.csv")
rm(presabnodup, duplicatesbegone, celobsmore1, celobsmore1_novals, celobsmore1_allnovals, celobsmore1_allvals) #just because they are bigish variables
```



# monthly spearmans correlations and VIF

curious - does correlations/vif alter between months?

First split the dataset into monthly

```{r subset monthly presab}
janpresab <- subset(presab, month == "1")
febpresab <- subset(presab, month == "2")
marpresab <- subset(presab, month == "3")
aprpresab <- subset(presab, month == "4")
maypresab <- subset(presab, month == "5")
junpresab <- subset(presab, month == "6")
julpresab <- subset(presab, month == "7")
augpresab <- subset(presab, month == "8")
seppresab <- subset(presab, month == "9")
octpresab <- subset(presab, month == "10")
novpresab <- subset(presab, month == "11")
decpresab <- subset(presab, month == "12")
```

now get the background points for each month (for spearmans)
```{r monthly background points}
janback <- subset(janpresab, occurrence == "0")
febback <- subset(febpresab, occurrence == "0")
marback <- subset(marpresab, occurrence == "0")
aprback <- subset(aprpresab, occurrence == "0")
mayback <- subset(maypresab, occurrence == "0")
junback <- subset(junpresab, occurrence == "0")
julback <- subset(julpresab, occurrence == "0")
augback <- subset(augpresab, occurrence == "0")
sepback <- subset(seppresab, occurrence == "0")
octback <- subset(octpresab, occurrence == "0")
novback <- subset(novpresab, occurrence == "0")
decback <- subset(decpresab, occurrence == "0")
```

run the correlations on each month's background points

```{r monthly spearman correlations}
janback <- subset(janback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
janback_cor <- cor(janback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(janback_cor, "../output/env/janback_cor.csv", row.names = TRUE)

febback <- subset(febback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
febback_cor <- cor(febback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(febback_cor, "../output/env/febback_cor.csv", row.names = TRUE)

marback <- subset(marback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
marback_cor <- cor(marback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(marback_cor, "../output/env/marback_cor.csv", row.names = TRUE)

aprback <- subset(aprback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
aprback_cor <- cor(aprback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(aprback_cor, "../output/env/aprback_cor.csv", row.names = TRUE)

mayback <- subset(mayback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
mayback_cor <- cor(mayback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(mayback_cor, "../output/env/mayback_cor.csv", row.names = TRUE)

junback <- subset(junback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
junback_cor <- cor(junback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(junback_cor, "../output/env/junback_cor.csv", row.names = TRUE)

julback <- subset(julback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
julback_cor <- cor(julback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(julback_cor, "../output/env/julback_cor.csv", row.names = TRUE)

augback <- subset(augback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
augback_cor <- cor(augback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(augback_cor, "../output/env/augback_cor.csv", row.names = TRUE)

sepback <- subset(sepback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
sepback_cor <- cor(sepback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(sepback_cor, "../output/env/sepback_cor.csv", row.names = TRUE)

octback <- subset(octback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
octback_cor <- cor(octback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(octback_cor, "../output/env/octback_cor.csv", row.names = TRUE)

novback <- subset(novback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
novback_cor <- cor(novback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(novback_cor, "../output/env/novback_cor.csv", row.names = TRUE)

decback <- subset(decback, select = c(amo_sample, amo_winter, amo_prev, nao_sample, nao_winter, nao_prev, chl_surface, chl_depth, mlp_surface, ssh_surface, temp_surface, temp_depth, o2_surface, o2_depth, salinity_surface, salinity_depth))
decback_cor <- cor(decback, use="complete.obs", method = "spearman") #need to create a correlation using cor to plot. complete.obs means na.rm in this package. #using spearmans as suitable for non-linear relationships
write.csv(decback_cor, "../output/env/decback_cor.csv", row.names = TRUE)
```



#maxent?

```{r}
library("raster")
library("dismo")
library("rgeos")
```

